In [ ]:
%run "../Functions/2. Google form analysis.ipynb"
(userIdThatDidNotAnswer) (userId1AnswerEN) (userIdAnswersEN) (userId1ScoreEN) (userIdScoresEN) (userId1AnswerFR) (userIdAnswersFR) (userId1ScoreFR) (userIdScoresFR) (userIdAnswersENFR)
In [ ]:
allGFormResponders = getAllResponders()
In [ ]:
for uid in allGFormResponders:
if(len(getUserSessions(rmdf1522, uid)) > 2):
print(uid + ' ' + str(len(getUserSessions(rmdf1522, uid))))
In [ ]:
type(allGFormResponders)
In [ ]:
len(allGFormResponders)
In [ ]:
allUniqueGFormResponders = pd.Series(allGFormResponders).unique()
len(allUniqueGFormResponders)
In [ ]:
allMultipleGFormResponders = pd.Series(gform[localplayerguidkey]).value_counts()
allMultipleGFormResponders = allMultipleGFormResponders[allMultipleGFormResponders > 1]
allMultipleGFormResponders = pd.Series(allMultipleGFormResponders.index)
#allMultipleGFormResponders
In [ ]:
testUserID = '"bfdfd356-5d6f-4696-a2f1-c1dc338aa64b"' # 4 sessions
In [ ]:
#testUserID = '"a7936587-8b71-43b6-9c61-17b2c2b55de3"' # 2 sessions
In [ ]:
gformTimestamps = gform[gform[localplayerguidkey]==testUserID][QTimestamp]
In [ ]:
gform[gform[QTimestamp] > '2017/10/15'][QTimestamp].sort_values()
In [ ]:
pd.to_datetime('2017/10/16 2:14:12 PM GMT+1')
In [ ]:
pd.to_datetime('2017/10/16 2:14:12 PM GMT+1', utc=True)
In [ ]:
pd.to_datetime('2017/10/16 2:14:12 PM')
In [ ]:
pd.to_datetime('2017-11-02T11:08:03.813Z')
In [ ]:
pd.to_datetime('2017-11-02T11:08:03.813Z', utc=True)
In [ ]:
rmdf1522\
[(rmdf1522['userTime']>pd.Timestamp('2017-10-26T14:37', tz='UTC'))\
& (rmdf1522['userTime']<pd.Timestamp('2017-10-26T14:38', tz='UTC'))]\
['userTime'].sort_values()
#.dropna(subset=['userTime'])
In [ ]:
#rmdf1522.query("('2017-10-24T08:15:11' < userTime < '2017-10-24T08:15:22')")
In [ ]:
rmdf1522[rmdf1522['userTime']>pd.Timestamp('2017-10-16T11:58:03', tz='UTC')]['userTime'].sort_values().head()
firstGameTime='2017-10-16 12:06:39.217000+00:00' dateGform='2017-10-16 14:04:27+00:00' dateGform='2017-10-16 15:22:25+00:00' firstGameTime='2017-10-16 11:58:03.987000+00:00' dateGform='2017-10-16 13:55:31+00:00' dateGform='2017-10-16 15:06:32+00:00'
67603 2017-10-16T12:06:39.217Z 18 2017/10/16 1:04:27 PM GMT+1 23 2017/10/16 2:22:25 PM GMT+1
66989 2017-10-16T11:58:03.987Z
13 2017/10/16 12:55:31 PM GMT+1
59 2017/10/16 2:06:32 PM GMT+1
In [ ]:
rmfirst1 = '2017-10-16T12:06:39.217Z'
gfa1before1 = '2017/10/16 1:04:27 PM GMT+1'
gfa1after1 = '2017/10/16 2:22:25 PM GMT+1'
rmfirst2 = '2017-10-16T11:58:03.987Z'
gfa1before2 = '2017/10/16 12:55:31 PM GMT+1'
gfa1after2 = '2017/10/16 2:06:32 PM GMT+1'
In [ ]:
dt_rmfirst1 = pd.to_datetime(rmfirst1)
dt_gfa1before1 = pd.to_datetime(gfa1before1)
dt_gfa1after1 = pd.to_datetime(gfa1after1)
dt_rmfirst2 = pd.to_datetime(rmfirst2)
dt_gfa1before2 = pd.to_datetime(gfa1before2)
dt_gfa1after2 = pd.to_datetime(gfa1after2)
In [ ]:
dt_rmfirst1 = pd.to_datetime(rmfirst1)
dt_gfa1before1 = pd.to_datetime(gfa1before1)
dt_gfa1after1 = pd.to_datetime(gfa1after1)
dt_rmfirst2 = pd.to_datetime(rmfirst2)
dt_gfa1before2 = pd.to_datetime(gfa1before2)
dt_gfa1after2 = pd.to_datetime(gfa1after2)
In [ ]:
dt_rmfirst1utc = pd.to_datetime(rmfirst1, utc=True)
dt_gfa1before1utc = pd.to_datetime(gfa1before1).tz_localize('Europe/Berlin')
dt_gfa1after1utc = pd.to_datetime(gfa1after1).tz_localize('Europe/Berlin')
dt_rmfirst2utc = pd.to_datetime(rmfirst2, utc=True)
dt_gfa1before2utc = pd.to_datetime(gfa1before2).tz_localize('Europe/Berlin')
dt_gfa1after2utc = pd.to_datetime(gfa1after2).tz_localize('Europe/Berlin')
In [ ]:
[dt_rmfirst1,dt_gfa1before1,dt_gfa1after1,dt_rmfirst2,dt_gfa1before2,dt_gfa1after2]
In [ ]:
[dt_rmfirst1utc,dt_gfa1before1utc,dt_gfa1after1utc,dt_rmfirst2utc,dt_gfa1before2utc,dt_gfa1after2utc]
In [ ]:
[dt_rmfirst1utc>dt_gfa1before1utc,dt_rmfirst1utc<dt_gfa1after1utc,dt_rmfirst2utc>dt_gfa1before2utc,dt_rmfirst2utc<dt_gfa1after2utc]
In [ ]:
stamp = pd.to_datetime('2017-11-02T11:08:03.813Z', utc=True)
stamp
In [ ]:
stamp = pd.to_datetime('2017-11-02T11:08:03.813Z')
type(stamp)
In [ ]:
#stamp = stamp.tz_localize('America/Sao_Paulo')
stamp = stamp.tz_localize('GMT')
new_stamp = stamp.tz_convert('US/Eastern')
new_stamp
In [ ]:
stamp
In [ ]:
new_stamp
In [ ]:
for timestamp in gformTimestamps:
print(timestamp)
In [ ]:
sessions = getUserSessions(rmdf1522, testUserID)
len(sessions)
In [ ]:
sessions
In [ ]:
_userId = 'bfdfd356-5d6f-4696-a2f1-c1dc338aa64b'
_rmDF = rmdf1522
# get datetime of first significant event
# _userId is assumed to be in RedMetrics format
# returns a pandas._libs.tslib.Timestamp
#def getFirstEventDate( _userId, _rmDF = rmdf1522 ):
_sessions = getUserSessions(_rmDF, _userId)
_firstGameTime = pd.Timestamp.max.tz_localize('utc')
for session in _sessions:
_timedEvents = _rmDF[_rmDF['sessionId']==session]
# drops irrelevant events like 'start' and 'configure' ones
_timedEvents = _timedEvents.dropna(subset=['section'])
if(len(_timedEvents) > 0):
_earliest = _timedEvents['userTime'].min()
if(_earliest < _firstGameTime):
_firstGameTime = _earliest
_firstGameTime
In [ ]:
_userId = 'bfdfd356-5d6f-4696-a2f1-c1dc338aa64b'
_rmDF = rmdf1522
# get datetime of first and last significant events
# _userId is assumed to be in RedMetrics format
# returns a pair of pandas._libs.tslib.Timestamp
#def getBoundingEventDates( _userId, _rmDF = rmdf1522 ):
_sessions = getUserSessions(_rmDF, _userId)
_firstGameTime = pd.Timestamp.max.tz_localize('utc')
_lastGameTime = pd.Timestamp.min.tz_localize('utc')
for session in _sessions:
_timedEvents = _rmDF[_rmDF['sessionId']==session]
# drops irrelevant events like 'start' and 'configure' ones
_timedEvents = _timedEvents.dropna(subset=['section'])
if(len(_timedEvents) > 0):
_earliest = _timedEvents['userTime'].min()
_latest = _timedEvents['userTime'].max()
if(_earliest < _firstGameTime):
_firstGameTime = _earliest
if(_latest > _lastGameTime):
_lastGameTime = _latest
(_firstGameTime,_lastGameTime)
In [ ]:
for timestamp in gform[gform[localplayerguidkey]==_userId][QTimestamp]:
print(timestamp)
if(timestamp < _firstGameTime):
print(answerTemporalities[0])
elif (timestamp > _firstGameTime):
print(answerTemporalities[1])
else:
print(answerTemporalities[2])
In [ ]:
gform[gform[localplayerguidkey]==_userId][QTimestamp]
In [ ]:
for userId in allMultipleGFormResponders:
(firstGameTime,lastGameTime) = getBoundingEventDates(userId)
strDebug = '\n\n\nuid='+str(userId)\
+'\nfirst= ' + str(firstGameTime)\
+'\nlast= ' + str(lastGameTime)
gformTimestamps = gform[gform[localplayerguidkey]==userId][QTimestamp]
for dateGform in gformTimestamps:
# Google forms Timestamps are GMT+1
#print('dateGform='+str(dateGform))
dateGform = dateGform.tz_convert('utc')
if(firstGameTime != pd.Timestamp.max.tz_localize('utc')):
if(dateGform <= firstGameTime):
strDebug = strDebug + '\n' + str(dateGform) + '=>' + str(answerTemporalities[0])
elif (dateGform > firstGameTime):
strDebug = strDebug + '\n' + str(dateGform) + '=>' + str(answerTemporalities[1])
else:
strDebug = strDebug + '\n' + str(dateGform) + '=>' + str(answerTemporalities[2])
else:
strDebug = strDebug + '\n' + str(dateGform) + '=>' + str(answerTemporalities[2])
print(strDebug)
In [ ]:
gform[localplayerguidkey][12]
In [ ]:
gform[localplayerguidkey][50]
In [ ]:
gform[localplayerguidkey][50]
In [ ]:
getFirstEventDate(gform[localplayerguidkey][50])
In [ ]:
getUserSessions(rmdf1522, gform[localplayerguidkey][3])
In [ ]:
#for index in gform.index:
# print(index)
In [ ]:
gform[QTimestamp][0]
In [ ]:
gform.loc[0][localplayerguidkey]
In [ ]:
print(gform[QTemporality][0])
In [ ]:
print(gform.loc[6,QTemporality])
In [ ]:
setAnswerTemporalities(gform)
In [ ]:
def getFuncHasSameDate(thisDate):
def hasSameDate(timestamp):
return timestamp.date() == thisDate
return hasSameDate
In [ ]:
def associateUserIdWithGFAnswer(candidateUserId, surveyAnswerIndex, surveyAnswerTemporality):
if not (candidateUserId in _gformDFWithExtraColumn['userId']):
# candidate userId is available:
# associate this candidate userId to the answer
_gformDFWithExtraColumn.loc[surveyAnswerIndex, 'userId'] = candidateUserId
_gformDFWithExtraColumn.loc[surveyAnswerIndex, QTemporality] = surveyAnswerTemporality
else:
print("candidate " + candidateUserId + " already in use")
return ((_gformDFWithExtraColumn.loc[surveyAnswerIndex, 'userId'] == candidateUserId)\
and (_gformDFWithExtraColumn.loc[surveyAnswerIndex, QTemporality] == surveyAnswerTemporality))
In [ ]:
_associateUserIdsWithGFAnswerProgress = FloatProgress(min=0, max=10)
def associateUserIdsWithGFAnswer(candidateUserIds, surveyAnswerIndex, surveyAnswerTemporality, isConsoleVerbose = False):
if len(candidateUserIds) > 0:
# if(len(candidateUserIds) == 1):
# perfect case if this userId is available
_associateUserIdsWithGFAnswerProgress.max = len(candidateUserIds)
_associateUserIdsWithGFAnswerProgress.value = 0
_associateUserIdsWithGFAnswerProgress.description='associateUserIdsWithGFAnswer ' + str(surveyAnswerIndex) + ' progress:'
for candidate in candidateUserIds:
_associateUserIdsWithGFAnswerProgress.value += 1
if not (candidate in _gformDFWithExtraColumn['userId']):
if isConsoleVerbose:
print("candidate " + candidate + " available")
# candidate userId is available:
# associate this candidate userId to the answer
_gformDFWithExtraColumn.loc[surveyAnswerIndex, 'userId'] = candidate
_gformDFWithExtraColumn.loc[surveyAnswerIndex, QTemporality] = surveyAnswerTemporality
return True
if isConsoleVerbose:
print("candidate " + candidate + " already in use")
else:
if isConsoleVerbose:
print("no candidate userId")
return False
In [ ]:
_gformDFWithExtraColumn = gform.copy()
# initialization of 'userId' column
undefinedIndices = _gformDFWithExtraColumn[QTemporality] == answerTemporalities[2]
# indices for which the user id needs to be found
_gformDFWithExtraColumn.loc[undefinedIndices, 'userId'] = 0
# indices for which the user id is known
_gformDFWithExtraColumn.loc[~undefinedIndices, 'userId'] = _gformDFWithExtraColumn.loc[~undefinedIndices, localplayerguidkey]
totalCount = len(_gformDFWithExtraColumn)
beforeCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[0]])
afterCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[1]])
undefinedCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[2]])
print(\
"totalCount="+str(totalCount)+\
"\nbeforeCounts="+str(beforeCounts)+\
"\nafterCounts="+str(afterCounts)+\
"\nundefinedCounts="+str(undefinedCounts)+\
"\n"
)
timestamp = pd.Timestamp.min
#surveyAnswer = _gformDFWithExtraColumn.loc[0,:]
In [ ]:
#for answerIndex in _gformDFWithExtraColumn.index:
# if(_gformDFWithExtraColumn.loc[answerIndex,QTemporality] == answerTemporalities[2]):
# print("answerIndex="+str(answerIndex))
# print(_gformDFWithExtraColumn.loc[answerIndex,localplayerguidkey])
# print(_gformDFWithExtraColumn.loc[answerIndex,QTimestamp])
# #surveyAnswer = _gformDFWithExtraColumn.loc[answerIndex,:]
# break
#timestamp = _gformDFWithExtraColumn.loc[answerIndex,QTimestamp]
#timestamp.date()
#rmdf1522.loc[0,'userTime'].date() == timestamp.date()
In [ ]:
#timestamp = _gformDFWithExtraColumn.loc[29,QTimestamp]
#timestamp
In [ ]:
#eventsOnThisDay['userTime'].dropna()
In [ ]:
# inputs:
# answerIndex
#
# output:
# _gformDFWithExtraColumn[answerIndex, 'userId'] may be set
# _gformDFWithExtraColumn[answerIndex, QTemporality] may be set
#
answerIndex = 29
_rmdf = rmdf1522
isConsoleVerbose = False
#
# 1st pass: 1 hour immediate time neighbors
# - 1h before and after, search for unlinked, matching, *unique*
# - if can only be pretest or posttest, so be it
# - if both possible, rely on survey answer
# 2nd pass:
# - 1h before and after, search for unlinked, matching, *closest*
# - as pretest, closest previous 'start'-type event with no later events and with no linked survey answer
# or closest series of events with no linked survey answer
# - as posttest, closest prior series of events with no linked survey answer
# 3rd pass:
# - 1 day before and after, search for unlinked, matching, closest
# 4th pass:
# - before and after, search for unlinked, matching, closest
_setAnswerTemporalityProgress = FloatProgress(min=0, max=5)
def setAnswerTemporality(answerIndex, _rmdf = rmdf1522, passNumber = 1, isConsoleVerbose = False):
_setAnswerTemporalityProgress.description='setAnswerTemporality ' + str(answerIndex) + ' progress:'
if isConsoleVerbose:
print("\n\nsetAnswerTemporality(" + str(answerIndex) + ")")
# test if already set
if _gformDFWithExtraColumn.loc[answerIndex, "Temporality"] != answerTemporalities[2]:
if isConsoleVerbose:
print("temporality already set to '" + _gformDFWithExtraColumn.loc[answerIndex, "Temporality"] + "'")
else:
timestamp = _gformDFWithExtraColumn.loc[answerIndex,QTimestamp]
_setAnswerTemporalityProgress.value += 1
################################################################################################################################################################
eventsBounding = _rmdf
if (passNumber == 1 or passNumber == 2):
#eventsBounding1h
eventsBounding = _rmdf[abs(_rmdf['userTime'] - timestamp) < datetime.timedelta(hours = 1)]
elif (passNumber == 3):
#eventsBounding48h
eventsBounding = _rmdf[abs(_rmdf['userTime'] - timestamp) < datetime.timedelta(days = 1)]
if len(eventsBounding) == 0:
if isConsoleVerbose:
print("no eventsBounding for user id '" + _gformDFWithExtraColumn.loc[answerIndex, localplayerguidkey] + "'")
else:
if isConsoleVerbose:
print("found eventsBounding for user id '" + _gformDFWithExtraColumn.loc[answerIndex, localplayerguidkey] + "'")
################################################################################################################################################################
_setAnswerTemporalityProgress.value += 1
eventsBefore = eventsBounding[(eventsBounding['userTime'] < timestamp)].sort_values(by='userTime', ascending=False)
eventsAfter = eventsBounding[(eventsBounding['userTime'] > timestamp)].sort_values(by='userTime', ascending=True)
#print("eventsBefore="+str(eventsBefore[['type', 'userId']].head())+"...")
#print("eventsAfter="+str(eventsAfter[['type', 'userId']].head())+"...")
if(len(eventsBefore) == 0):
if isConsoleVerbose:
print("no eventsBefore around " + str(timestamp.date()) + " for user id '" + _gformDFWithExtraColumn.loc[answerIndex, localplayerguidkey] + "'")
if(len(eventsAfter) == 0):
if isConsoleVerbose:
print("no eventsAfter around " + str(timestamp.date()) + " for user id '" + _gformDFWithExtraColumn.loc[answerIndex, localplayerguidkey] + "'")
################################################################################################################################################################
_setAnswerTemporalityProgress.value += 1
#potentialPretestUserIds = pd.Series()
strictPotentialPretestUserIds = pd.Series()
#potentialPosttestUserIds = pd.Series()
strictPotentialPosttestUserIds = pd.Series()
# construct potential user ids from user ids found in events before the survey answer
# test is pretest if userId in <timestamp range and event type is start OR
# or if userId only in >timestamp
potentialPretestUserIds = pd.Series(eventsBefore[eventsBefore['type'] == 'start'].sort_values(by='userTime', ascending=False)['userId'].unique())
if(len(potentialPretestUserIds) > 0):
strictPotentialPretestUserIds = potentialPretestUserIds[potentialPretestUserIds.isin(eventsAfter['userId'])]
# construct potential user ids from user ids found in events after the survey answer
potentialPosttestUserIds = pd.Series(eventsBefore[~(eventsBefore['userId'].isin(potentialPretestUserIds))].sort_values(by='userTime', ascending=True)['userId'].unique())
if(len(potentialPosttestUserIds) > 0):
strictPotentialPosttestUserIds = pd.Series(potentialPosttestUserIds[~potentialPosttestUserIds.isin(eventsAfter['userId'])].unique())
_setAnswerTemporalityProgress.value += 1
#print(
# "#potentialPretestUserIds="+str(len(potentialPretestUserIds))\
# + "\n#strictPotentialPretestUserIds="+str(len(strictPotentialPretestUserIds))\
# + "\n#potentialPosttestUserIds="+str(len(potentialPosttestUserIds))\
# + "\n#strictPotentialPosttestUserIds="+str(len(strictPotentialPosttestUserIds)))
#print("\n")
# check whether order is conserved
# remove userIds that are already linked to a survey answer
potentialPretestUserIds = potentialPretestUserIds[~potentialPretestUserIds.isin(_gformDFWithExtraColumn['userId'])]
strictPotentialPretestUserIds = strictPotentialPretestUserIds[~strictPotentialPretestUserIds.isin(_gformDFWithExtraColumn['userId'])]
potentialPosttestUserIds = potentialPosttestUserIds[~potentialPosttestUserIds.isin(_gformDFWithExtraColumn['userId'])]
strictPotentialPosttestUserIds = strictPotentialPosttestUserIds[~strictPotentialPosttestUserIds.isin(_gformDFWithExtraColumn['userId'])]
# print(
# "#potentialPretestUserIds="+str(len(potentialPretestUserIds))\
# + "\n#strictPotentialPretestUserIds="+str(len(strictPotentialPretestUserIds))\
# + "\n#potentialPosttestUserIds="+str(len(potentialPosttestUserIds))\
# + "\n#strictPotentialPosttestUserIds="+str(len(strictPotentialPosttestUserIds)))
# print("\n\n")
# booleans describing the type of survey answer
isPotentialPretest = len(potentialPretestUserIds) > 0
isStrictPotentialPretest = len(strictPotentialPretestUserIds) > 0
isPotentialPosttest = len(potentialPosttestUserIds) > 0
isStrictPotentialPosttest = len(strictPotentialPosttestUserIds) > 0
_setAnswerTemporalityProgress.value += 1
if(isPotentialPretest and not isPotentialPosttest):
# definitely a pretest in any case.
# which userId is linked to this survey answer?
if (passNumber == 1):
if len(strictPotentialPretestUserIds) == 1:
if not associateUserIdsWithGFAnswer(strictPotentialPretestUserIds, answerIndex, answerTemporalities[0], isConsoleVerbose):
# no solution
# the candidate user id is already being used by an other survey answer
if isConsoleVerbose:
print("fail: strictPotentialPretestUserIds")
else:
if isConsoleVerbose:
print("success: strictPotentialPretestUserIds")
else:
if isStrictPotentialPretest:
if not associateUserIdsWithGFAnswer(strictPotentialPretestUserIds, answerIndex, answerTemporalities[0], isConsoleVerbose):
# no solution
# the candidate user id is already being used by an other survey answer
if isConsoleVerbose:
print("fail: strictPotentialPretestUserIds")
else:
if isConsoleVerbose:
print("success: strictPotentialPretestUserIds")
else:
if not associateUserIdsWithGFAnswer(potentialPretestUserIds, answerIndex, answerTemporalities[0], isConsoleVerbose):
# no solution
# take closest in time?
if isConsoleVerbose:
print("fail: potentialPretestUserIds")
else:
if isConsoleVerbose:
print("success: potentialPretestUserIds")
elif (isPotentialPosttest and not isPotentialPretest):
# definitely a posttest in any case.
# which userId is linked to this survey answer?
if (passNumber == 1):
if len(strictPotentialPosttestUserIds) == 1:
if not associateUserIdsWithGFAnswer(strictPotentialPosttestUserIds, answerIndex, answerTemporalities[1], isConsoleVerbose):
# no solution
# the candidate user id is already being used by an other survey answer
if isConsoleVerbose:
print("fail: strictPotentialPosttestUserIds")
else:
if isConsoleVerbose:
print("success: strictPotentialPosttestUserIds")
else:
if isStrictPotentialPosttest:
if not associateUserIdsWithGFAnswer(strictPotentialPosttestUserIds, answerIndex, answerTemporalities[1], isConsoleVerbose):
# no solution
# the candidate user id is already being used by an other survey answer
if isConsoleVerbose:
print("fail: strictPotentialPosttestUserIds")
else:
if isConsoleVerbose:
print("success: strictPotentialPosttestUserIds")
else:
if not associateUserIdsWithGFAnswer(potentialPosttestUserIds, answerIndex, answerTemporalities[1], isConsoleVerbose):
# no solution
# take closest in time?
if isConsoleVerbose:
print("fail: potentialPosttestUserIds")
else:
if isConsoleVerbose:
print("success: potentialPosttestUserIds")
else:
# pretest or posttest?
if isConsoleVerbose:
print("couldn\'t determine pretest or posttest")
# print("\n\n")
# print(
# "#potentialPretestUserIds="+str(len(potentialPretestUserIds))\
# + "\n#strictPotentialPretestUserIds="+str(len(strictPotentialPretestUserIds))\
# + "\n#potentialPosttestUserIds="+str(len(potentialPosttestUserIds))\
# + "\n#strictPotentialPosttestUserIds="+str(len(strictPotentialPosttestUserIds)))
# print("\n")
# print(
# "isPotentialPretest="+str(isPotentialPretest)\
# + "\nisStrictPotentialPretest="+str(isStrictPotentialPretest)\
# + "\nisPotentialPosttest="+str(isPotentialPosttest)\
# + "\nisStrictPotentialPosttest="+str(isStrictPotentialPosttest))
if isConsoleVerbose:
print("end temporality=" + _gformDFWithExtraColumn.loc[answerIndex, "Temporality"])
In [ ]:
answerIndex = 29
timestamp = _gformDFWithExtraColumn.loc[answerIndex,QTimestamp]
timestampPrevDay = timestamp.date() - datetime.timedelta(days = 1)
timestampNextDay = timestamp.date() + datetime.timedelta(days = 1)
print(str((timestampPrevDay, timestamp.date(), timestampNextDay)))
eventsOnPrevDay = rmdf1522[rmdf1522['userTime'].apply(getFuncHasSameDate(timestampPrevDay))].dropna(axis=1, how='all')
eventsOnThisDay = rmdf1522[rmdf1522['userTime'].apply(getFuncHasSameDate(timestamp.date()))].dropna(axis=1, how='all')
eventsOnNextDay = rmdf1522[rmdf1522['userTime'].apply(getFuncHasSameDate(timestampNextDay))].dropna(axis=1, how='all')
print((len(eventsOnPrevDay),len(eventsOnThisDay),len(eventsOnNextDay)))
In [ ]:
#eventsOnPrevDay['userTime'].max(), eventsOnNextDay['userTime'].min()
In [ ]:
_passProgress = FloatProgress(min=0, max=10)
def applyPass(passNumber):
_passProgress.description='Pass ' + str(passNumber) + ' progress:'
_passProgress.max = len(_gformDFWithExtraColumn.index)
_passProgress.value = 0
for answerIndex in _gformDFWithExtraColumn.index:
_passProgress.value += 1
if(_gformDFWithExtraColumn.loc[answerIndex,QTemporality] == answerTemporalities[2]):
setAnswerTemporality(answerIndex, _rmdf = rmdf1522, passNumber = passNumber);
In [ ]:
_gformDFWithExtraColumn = gform.copy()
def resetGFormWithExtraColumn():
global _gformDFWithExtraColumn
_gformDFWithExtraColumn = gform.copy()
# initialization of 'userId' column
undefinedIndices = _gformDFWithExtraColumn[QTemporality] == answerTemporalities[2]
# indices for which the user id needs to be found
_gformDFWithExtraColumn.loc[undefinedIndices, 'userId'] = 0
# indices for which the user id is known
_gformDFWithExtraColumn.loc[~undefinedIndices, 'userId'] = _gformDFWithExtraColumn.loc[~undefinedIndices, localplayerguidkey]
totalCount = len(_gformDFWithExtraColumn)
beforeCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[0]])
afterCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[1]])
undefinedCounts = len(_gformDFWithExtraColumn[_gformDFWithExtraColumn[QTemporality] == answerTemporalities[2]])
print(\
"totalCount="+str(totalCount)+\
"\nbeforeCounts="+str(beforeCounts)+\
"\nafterCounts="+str(afterCounts)+\
"\nundefinedCounts="+str(undefinedCounts)+\
"\n"
)
#timestamp = pd.Timestamp.min
#surveyAnswer = _gformDFWithExtraColumn.loc[0,:]
In [ ]:
resetGFormWithExtraColumn()
if answerTemporalities[2] in gform[QTemporality].values:
_undefinedCount = IntText(0, description='undefined count:')
_whileCount = IntText(0, description='while count:')
display(_undefinedCount)
display(_whileCount)
result = pd.DataFrame(data=gform[QTemporality].value_counts())
stepsCount = 4
__progress = FloatProgress(min=0, max=stepsCount, description='Pass count:')
display(__progress)
display(_passProgress)
display(_setAnswerTemporalityProgress)
display(_associateUserIdsWithGFAnswerProgress)
for passNumber in range(1,5):
previousValue = 0 #gform[QTemporality].value_counts()[answerTemporalities[2]]
_whileCount.value = 0
while (previousValue != _gformDFWithExtraColumn[QTemporality].value_counts()[answerTemporalities[2]]):
_whileCount.value += 1
previousValue = _gformDFWithExtraColumn[QTemporality].value_counts()[answerTemporalities[2]]
_undefinedCount.value = previousValue
applyPass(passNumber)
print("applied pass " + str(passNumber) + " " + str(_whileCount.value) + " times")
__progress.value += 1
result['pass ' + str(passNumber)] = _gformDFWithExtraColumn[QTemporality].value_counts()
result
In [ ]:
#def getExtendedTemporality( answerDate, firstGameEventDate ):
#result = answerTemporalities[2]
#if(gameEventDate != pd.Timestamp.max.tz_localize('utc')):
# if(answerDate <= gameEventDate):
# result = answerTemporalities[0]
# elif (answerDate > gameEventDate):
# result = answerTemporalities[1]
#else:
# # search for a close-enough session, even if it doesn't belong to the user
# # check that this user doesn't already have survey answers
#
# # search for games starting just after
# # pretests are more frequent than posttests
# eventsAfter = rmdf1522[rmdf1522['serverTime']]
#
# # search for games ending just before
#
# # search for any overlapping game
#
# # search for any overlapping game
#
#
#result
if(len(_gformDF[QTemporality].unique()) == 1):
# format : key = _userId, value = [_firstEventDate, 0 or _gformDF.index of before, 0 or _gformDF.index of after]
temporalities = {}
for _index in _gformDF.index:
_userId = _gformDF.loc[_index,localplayerguidkey]
_firstEventDate, beforeIndex, afterIndex = [0,0,0]
if _userId in temporalities:
_firstEventDate, beforeIndex, afterIndex = temporalities[_userId]
else:
_firstEventDate = getFirstEventDate(_userId)
temporality = getTemporality(_gformDF.loc[_index,QTimestamp],_firstEventDate)
if temporality == answerTemporalities[0] and beforeIndex != 0 :
if _gformDF.loc[_index,QTimestamp] > _gformDF.loc[beforeIndex,QTimestamp]:
_gformDF.loc[beforeIndex,QTemporality] = answerTemporalities[2]
else:
temporality = answerTemporalities[2]
elif temporality == answerTemporalities[1] and afterIndex != 0 :
if _gformDF.loc[_index,QTimestamp] < _gformDF.loc[afterIndex,QTimestamp]:
_gformDF.loc[afterIndex,QTemporality] = answerTemporalities[2]
else:
temporality = answerTemporalities[2]
_gformDF.loc[_index,QTemporality] = temporality
if temporality == answerTemporalities[0]:
beforeIndex = _index
elif temporality == answerTemporalities[1]:
afterIndex = _index
temporalities[_userId] = [_firstEventDate, beforeIndex, afterIndex]
print("temporalities set")